Google Maps Email Scraper Template
工作流概述
这是一个包含26个节点的复杂工作流,主要用于自动化处理各种任务。
工作流源代码
{
"name": "Google Maps Email Scraper Template",
"tags": [],
"nodes": [
{
"id": "79df5316-c210-478d-a4de-35b5d31924ee",
"name": "Remove Duplicate URLs",
"type": "n8n-nodes-base.removeDuplicates",
"position": [
-780,
380
],
"parameters": {},
"typeVersion": 1.1
},
{
"id": "985ac7e3-b501-4079-a043-780677c94b52",
"name": "Loop over queries",
"type": "n8n-nodes-base.splitInBatches",
"position": [
-1080,
-100
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "3a478935-781b-4fb1-bdc7-fcf8be1334bc",
"name": "Search Google Maps with query",
"type": "n8n-nodes-base.httpRequest",
"position": [
-1380,
380
],
"parameters": {
"url": "=https://www.google.com/maps/search/{{ $json.query }}",
"options": {
"allowUnauthorizedCerts": false
}
},
"executeOnce": false,
"typeVersion": 4.2,
"alwaysOutputData": false
},
{
"id": "477e7d55-b7d6-4b20-ac44-dd1f443e270a",
"name": "Scrape URLs from results",
"type": "n8n-nodes-base.code",
"position": [
-1180,
380
],
"parameters": {
"jsCode": "const data = $input.first().json.data
const regex = /https?:\/\/[^\/]+/g
const urls = data.match(regex)
return urls.map(url => ({json: {url: url}}))"
},
"typeVersion": 2
},
{
"id": "a5b67e45-a3f6-41d2-aa58-c26a441c41b2",
"name": "Filter irrelevant URLs",
"type": "n8n-nodes-base.filter",
"position": [
-980,
380
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "041797f2-2fe2-41dc-902a-d34050b9b304",
"operator": {
"type": "string",
"operation": "notRegex"
},
"leftValue": "={{ $json.url }}",
"rightValue": "=(google|gstatic|ggpht|schema\.org|example\.com|sentry-next\.wixpress\.com|imli\.com|sentry\.wixpress\.com|ingest\.sentry\.io)"
},
{
"id": "eb499a7e-17bc-453c-be08-a47286f726dd",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "40ec6d1f-1c98-4c9f-8499-c5893c3df7b9",
"name": "Request web page for URL",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueRegularOutput",
"position": [
-380,
460
],
"parameters": {
"url": "={{ $json.url }}",
"options": {}
},
"typeVersion": 4.2,
"alwaysOutputData": false
},
{
"id": "12f662a8-c55f-409a-b381-f37ab6dd3794",
"name": "Loop over URLs",
"type": "n8n-nodes-base.splitInBatches",
"onError": "continueErrorOutput",
"position": [
-580,
380
],
"parameters": {
"options": {
"reset": false
}
},
"typeVersion": 3
},
{
"id": "e6957d05-3533-48ae-9cc1-ee4ac026a2a6",
"name": "Loop over pages",
"type": "n8n-nodes-base.splitInBatches",
"onError": "continueErrorOutput",
"position": [
-360,
120
],
"parameters": {
"options": {}
},
"typeVersion": 3,
"alwaysOutputData": false
},
{
"id": "018621c0-0ea9-4865-b110-b6d0727f0588",
"name": "Scrape emails from page",
"type": "n8n-nodes-base.code",
"onError": "continueRegularOutput",
"position": [
-200,
220
],
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "const data = $json.data
const emailRegex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.(?!png|jpg|gif|jpeg)[a-zA-Z]{2,}/g
const emails = data.match(emailRegex)
return {json: {emails: emails}}"
},
"typeVersion": 2
},
{
"id": "5509b8e2-a6fc-4fbe-bbc5-1bc1d5de1c98",
"name": "Aggregate arrays of emails",
"type": "n8n-nodes-base.aggregate",
"position": [
-40,
100
],
"parameters": {
"options": {
"mergeLists": true
},
"fieldsToAggregate": {
"fieldToAggregate": [
{
"fieldToAggregate": "emails"
}
]
}
},
"typeVersion": 1
},
{
"id": "f1f01f03-b62e-453f-b938-ffe4f9b3f4de",
"name": "Split out into default data structure",
"type": "n8n-nodes-base.splitOut",
"position": [
180,
100
],
"parameters": {
"options": {},
"fieldToSplitOut": "emails"
},
"typeVersion": 1
},
{
"id": "ec27d665-d9c1-4f10-9c52-0d5ea89cbf77",
"name": "Remove duplicate emails",
"type": "n8n-nodes-base.removeDuplicates",
"position": [
400,
100
],
"parameters": {
"compare": "selectedFields",
"options": {},
"fieldsToCompare": "emails"
},
"typeVersion": 1.1
},
{
"id": "4a071bf0-23ad-455b-b231-bafd3b32e4f8",
"name": "Filter irrelevant emails",
"type": "n8n-nodes-base.filter",
"position": [
600,
100
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "041797f2-2fe2-41dc-902a-d34050b9b304",
"operator": {
"type": "string",
"operation": "notRegex"
},
"leftValue": "={{ $json.emails }}",
"rightValue": "=(google|gstatic|ggpht|schema\.org|example\.com|sentry\.wixpress\.com|sentry-next\.wixpress\.com|ingest\.sentry\.io|sentry\.io|imli\.com)"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "59675faa-2b0d-4ba5-82c7-dc5dedcad31e",
"name": "Save emails to Google Sheet",
"type": "n8n-nodes-base.googleSheets",
"position": [
800,
100
],
"parameters": {
"columns": {
"value": {
"Emails": "={{ $json.emails }}"
},
"schema": [
{
"id": "Emails",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "Emails",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [
"Emails"
]
},
"options": {},
"operation": "append"
},
"typeVersion": 4.5
},
{
"id": "93437e8b-4f8d-40a1-9585-cab1b556164a",
"name": "Starts scraper workflow",
"type": "n8n-nodes-base.executeWorkflowTrigger",
"position": [
-1600,
380
],
"parameters": {},
"typeVersion": 1
},
{
"id": "eed77477-777d-450d-a975-4d2848b1cf55",
"name": "Run workflow",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-1320,
-100
],
"parameters": {},
"typeVersion": 1
},
{
"id": "dffaf04e-d1d2-4002-9a69-f0904b61fc2d",
"name": "Wait between executions",
"type": "n8n-nodes-base.wait",
"position": [
-700,
0
],
"webhookId": "40eb11a9-0f7d-4932-993e-0052b69dbf9b",
"parameters": {
"amount": 2
},
"typeVersion": 1.1
},
{
"id": "18787007-1d11-41b9-89c3-d5f69756eda7",
"name": "Execute scraper for query",
"type": "n8n-nodes-base.executeWorkflow",
"position": [
-880,
0
],
"parameters": {
"mode": "each",
"options": {
"waitForSubWorkflow": false
},
"workflowId": {
"__rl": true,
"mode": "id",
"value": "={{ $workflow.id }}"
}
},
"typeVersion": 1.1
},
{
"id": "67fcde25-05e4-437c-b799-4448baea7891",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2280,
-140
],
"parameters": {
"color": 5,
"width": 740,
"height": 180,
"content": "## 🛠 Setup
1. Setup your list of queries in the \"Run workflow\" manual trigger node. Watch this [video](https://youtu.be/HaiO-UeiKBA) on how to generate the queries with ChatGPT.
3. Choose a sheet to populate with data in the **Google Sheets node**
4. Run the workflow and start getting leads into your Google Sheets document"
},
"typeVersion": 1
},
{
"id": "ac880457-44b4-4ff7-8440-b4107f8468bb",
"name": "Sticky Note2",
"type": "n8n-nodes-base.stickyNote",
"position": [
-700,
-120
],
"parameters": {
"color": 6,
"height": 100,
"content": "**Optional** 👇
Set wait time between each query workflow execution. Default is 2 seconds."
},
"typeVersion": 1
},
{
"id": "d83afb3d-7b71-4b47-9b50-28837aac408c",
"name": "Sticky Note3",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1600,
260
],
"parameters": {
"width": 480,
"height": 100,
"content": "### Scraper 👇
This workflow will be executed in the background for each query. Click the **All executions** tab in the left sidebar to see the executions running."
},
"typeVersion": 1
},
{
"id": "007b621a-3d41-4358-aa45-560a3c8e3414",
"name": "Sticky Note4",
"type": "n8n-nodes-base.stickyNote",
"position": [
820,
300
],
"parameters": {
"color": 4,
"height": 180,
"content": "👆
1. Setup your **credentials**. Here's a [video tutorial](https://youtu.be/O5RnWDM27M8) on how to do that.
2. Choose which document and sheet to save the scraped emails to. "
},
"typeVersion": 1
},
{
"id": "fc0b837f-624c-4d25-8ed7-f787f76c785b",
"name": "Sticky Note5",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1760,
-360
],
"parameters": {
"color": 3,
"content": " ## ⚠️ Note
A [video tutorial](https://youtu.be/HaiO-UeiKBA) for this workflow guide is available on my [Youtube channel](https://www.youtube.com/channel/UCn8xmUBunez1SsDVRfZDUGA)"
},
"typeVersion": 1
},
{
"id": "2f8665d5-2890-4f7d-908b-9c09a66b6c93",
"name": "Sticky Note6",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2280,
-360
],
"parameters": {
"color": 7,
"width": 480,
"height": 140,
"content": "## Google Maps Automatic Email Scraper
This workflow automatically scrapes emails from businesses on Google Maps based on a list of queries that you provide."
},
"typeVersion": 1
},
{
"id": "7414b2ed-259d-47da-bbd1-d9ce0d64d43c",
"name": "Sticky Note7",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1000,
540
],
"parameters": {
"color": 6,
"width": 160,
"height": 100,
"content": "**Optional** 👆
Add or change the regex for filtering irrelevant URLs."
},
"typeVersion": 1
},
{
"id": "789c9a02-e6e7-4ea6-a7a2-acc7715b377a",
"name": "Sticky Note8",
"type": "n8n-nodes-base.stickyNote",
"position": [
580,
260
],
"parameters": {
"color": 6,
"width": 200,
"height": 100,
"content": "**Optional** 👆
Add or change the regex for filtering irrelevant/incorrect email addresses."
},
"typeVersion": 1
}
],
"active": false,
"pinData": {
"Run workflow": [
{
"json": {
"query": "hollywood+dentist"
}
},
{
"json": {
"query": "downtown+los+angeles+dentist"
}
},
{
"json": {
"query": "santa+monica+dentist"
}
},
{
"json": {
"query": "westwood+dentist"
}
},
{
"json": {
"query": "west+l.a.+dentist"
}
},
{
"json": {
"query": "the+valley+dentist"
}
},
{
"json": {
"query": "echo+park+dentist"
}
},
{
"json": {
"query": "culver+city+dentist"
}
},
{
"json": {
"query": "pasadena+dentist"
}
},
{
"json": {
"query": "silver+lake+dentist"
}
},
{
"json": {
"query": "mid-wilshire+dentist"
}
},
{
"json": {
"query": "beverly+hills+dentist"
}
},
{
"json": {
"query": "north+hills+dentist"
}
},
{
"json": {
"query": "south+los+angeles+dentist"
}
}
]
},
"settings": {
"executionOrder": "v1"
},
"connections": {
"Run workflow": {
"main": [
[
{
"node": "Loop over queries",
"type": "main",
"index": 0
}
]
]
},
"Loop over URLs": {
"main": [
[
{
"node": "Loop over pages",
"type": "main",
"index": 0
}
],
[
{
"node": "Request web page for URL",
"type": "main",
"index": 0
}
]
]
},
"Loop over pages": {
"main": [
[
{
"node": "Aggregate arrays of emails",
"type": "main",
"index": 0
}
],
[
{
"node": "Scrape emails from page",
"type": "main",
"index": 0
}
]
]
},
"Loop over queries": {
"main": [
[],
[
{
"node": "Execute scraper for query",
"type": "main",
"index": 0
}
]
]
},
"Remove Duplicate URLs": {
"main": [
[
{
"node": "Loop over URLs",
"type": "main",
"index": 0
}
]
]
},
"Filter irrelevant URLs": {
"main": [
[
{
"node": "Remove Duplicate URLs",
"type": "main",
"index": 0
}
]
]
},
"Remove duplicate emails": {
"main": [
[
{
"node": "Filter irrelevant emails",
"type": "main",
"index": 0
}
]
]
},
"Scrape emails from page": {
"main": [
[
{
"node": "Loop over pages",
"type": "main",
"index": 0
}
]
]
},
"Starts scraper workflow": {
"main": [
[
{
"node": "Search Google Maps with query",
"type": "main",
"index": 0
}
]
]
},
"Wait between executions": {
"main": [
[
{
"node": "Loop over queries",
"type": "main",
"index": 0
}
]
]
},
"Filter irrelevant emails": {
"main": [
[
{
"node": "Save emails to Google Sheet",
"type": "main",
"index": 0
}
]
]
},
"Request web page for URL": {
"main": [
[
{
"node": "Loop over URLs",
"type": "main",
"index": 0
}
]
]
},
"Scrape URLs from results": {
"main": [
[
{
"node": "Filter irrelevant URLs",
"type": "main",
"index": 0
}
]
]
},
"Execute scraper for query": {
"main": [
[
{
"node": "Wait between executions",
"type": "main",
"index": 0
}
]
]
},
"Aggregate arrays of emails": {
"main": [
[
{
"node": "Split out into default data structure",
"type": "main",
"index": 0
}
]
]
},
"Search Google Maps with query": {
"main": [
[
{
"node": "Scrape URLs from results",
"type": "main",
"index": 0
}
]
]
},
"Split out into default data structure": {
"main": [
[
{
"node": "Remove duplicate emails",
"type": "main",
"index": 0
}
]
]
}
}
}
功能特点
- 自动检测新邮件
- AI智能内容分析
- 自定义分类规则
- 批量处理能力
- 详细的处理日志
技术分析
节点类型及作用
- Removeduplicates
- Splitinbatches
- Httprequest
- Code
- Filter
复杂度评估
配置难度:
维护难度:
扩展性:
实施指南
前置条件
- 有效的Gmail账户
- n8n平台访问权限
- Google API凭证
- AI分类服务订阅
配置步骤
- 在n8n中导入工作流JSON文件
- 配置Gmail节点的认证信息
- 设置AI分类器的API密钥
- 自定义分类规则和标签映射
- 测试工作流执行
- 配置定时触发器(可选)
关键参数
| 参数名称 | 默认值 | 说明 |
|---|---|---|
| maxEmails | 50 | 单次处理的最大邮件数量 |
| confidenceThreshold | 0.8 | 分类置信度阈值 |
| autoLabel | true | 是否自动添加标签 |
最佳实践
优化建议
- 定期更新AI分类模型以提高准确性
- 根据邮件量调整处理批次大小
- 设置合理的分类置信度阈值
- 定期清理过期的分类规则
安全注意事项
- 妥善保管API密钥和认证信息
- 限制工作流的访问权限
- 定期审查处理日志
- 启用双因素认证保护Gmail账户
性能优化
- 使用增量处理减少重复工作
- 缓存频繁访问的数据
- 并行处理多个邮件分类任务
- 监控系统资源使用情况
故障排除
常见问题
邮件未被正确分类
检查AI分类器的置信度阈值设置,适当降低阈值或更新训练数据。
Gmail认证失败
确认Google API凭证有效且具有正确的权限范围,重新进行OAuth授权。
调试技巧
- 启用详细日志记录查看每个步骤的执行情况
- 使用测试邮件验证分类逻辑
- 检查网络连接和API服务状态
- 逐步执行工作流定位问题节点
错误处理
工作流包含以下错误处理机制:
- 网络超时自动重试(最多3次)
- API错误记录和告警
- 处理失败邮件的隔离机制
- 异常情况下的回滚操作